library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(cluster)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("leaflet")
##
## The downloaded binary packages are in
## /var/folders/l9/spktl4tx5_jffrl9r24cgl4w0000gn/T//RtmpNMhcm1/downloaded_packages
library(leaflet)
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("ggmap")
##
## The downloaded binary packages are in
## /var/folders/l9/spktl4tx5_jffrl9r24cgl4w0000gn/T//RtmpNMhcm1/downloaded_packages
library(ggmap)
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
## Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service>
## OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("sf")
##
## The downloaded binary packages are in
## /var/folders/l9/spktl4tx5_jffrl9r24cgl4w0000gn/T//RtmpNMhcm1/downloaded_packages
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("osmdata")
##
## The downloaded binary packages are in
## /var/folders/l9/spktl4tx5_jffrl9r24cgl4w0000gn/T//RtmpNMhcm1/downloaded_packages
library(osmdata)
## Data (c) OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("rosm")
##
## The downloaded binary packages are in
## /var/folders/l9/spktl4tx5_jffrl9r24cgl4w0000gn/T//RtmpNMhcm1/downloaded_packages
library(rosm)
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("ggspatial")
##
## The downloaded binary packages are in
## /var/folders/l9/spktl4tx5_jffrl9r24cgl4w0000gn/T//RtmpNMhcm1/downloaded_packages
library(ggspatial)
options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("prettymapr")
##
## The downloaded binary packages are in
## /var/folders/l9/spktl4tx5_jffrl9r24cgl4w0000gn/T//RtmpNMhcm1/downloaded_packages
library(prettymapr)
##
## Attaching package: 'prettymapr'
##
## The following objects are masked from 'package:rosm':
##
## makebbox, zoombbox
##
## The following objects are masked from 'package:ggmap':
##
## clear_geocode_cache, geocode
zaehlstellen <- read_csv("data/dauerzaehlstellen_location.csv")
## Rows: 87 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): ZNAME, STRNR, RICHTUNG_1, RICHTUNG_2, BEZIRK_NAME
## dbl (6): ZNR, LONGITUDE, LATITUDE, BEZIRK_PLZ, BEZIRK_NR, BEZIRK_CODE
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
verkehr <- read_csv("data/dauerzaehlstellen_data.csv")
## Rows: 40418 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): ZNAME, STRTYP, RINAME, FZTYP
## dbl (11): ZNR, STRNR, DTVMS, DTVMF, DTVMO, DTVDD, DTVFR, DTVSA, DTVSF, TVMA...
## date (2): DATUM, TVMAXT
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Nur Einträge mit RINAME == "Gesamt" => Auffahrten und Ausfahrten werden nicht berücksichtigt
gesamt <- verkehr %>% filter(RINAME == "Gesamt")
gesamt_kfz <- gesamt %>% filter(FZTYP == "Kfz")
gesamt_lkw <- gesamt %>% filter(FZTYP == "LkwÄ")
gesamt_kfz$ZNR <- as.integer(gesamt_kfz$ZNR)
gesamt_lkw$ZNR <- as.integer(gesamt_lkw$ZNR)
zaehlstellen$ZNR <- as.integer(zaehlstellen$ZNR)
# Mergen mit Koordinaten durch die logs
gesamt_kfz_geo <- inner_join(gesamt_kfz, zaehlstellen, by = "ZNR")
gesamt_lkw_geo <- inner_join(gesamt_lkw, zaehlstellen, by = "ZNR")
ggplot() +
geom_point(data = gesamt_kfz_geo, aes(x = as.numeric(LONGITUDE), y = as.numeric(LATITUDE)),
color = "red", alpha = 0.6, size = 2) +
labs(title = "Zählstellen Wien", x = "Längengrad", y = "Breitengrad") +
theme_minimal()

# Clusteranalyse (Features) gesamte Woche
cluster_features <- c("DTVMO", "DTVDD", "DTVFR", "DTVSA", "DTVSF")
# Daten bereinigen
kfz_data <- gesamt_kfz_geo %>%
select(all_of(cluster_features)) %>%
mutate_all(as.numeric) %>%
drop_na()
lkw_data <- gesamt_lkw_geo %>%
select(all_of(cluster_features)) %>%
mutate_all(as.numeric) %>%
drop_na()
kfz_scaled <- scale(kfz_data)
lkw_scaled <- scale(lkw_data)
# Elbow-Methode kfz
fviz_nbclust(kfz_scaled, kmeans, method = "wss") +
labs(title = "Elbow Method: Optimale Clusteranzahl (KMeans) Kfz", x = "Anzahl Cluster (k)", y = "WSS")

# Elbow-Methode lkw
fviz_nbclust(lkw_scaled, kmeans, method = "wss") +
labs(title = "Elbow Method: Optimale Clusteranzahl (KMeans) LkwÄ", x = "Anzahl Cluster (k)", y = "WSS")

# K-Means Cluster
kfz_cluster <- kmeans(kfz_scaled, centers = 3, nstart = 25)
lkw_cluster <- kmeans(lkw_scaled, centers = 3, nstart = 25)
# Cluster-Zuordnung hinzufügen
gesamt_kfz_geo_clean <- gesamt_kfz_geo[complete.cases(kfz_data), ]
gesamt_kfz_geo_clean$Cluster <- factor(kfz_cluster$cluster)
gesamt_lkw_geo_clean <- gesamt_lkw_geo[complete.cases(lkw_data), ]
gesamt_lkw_geo_clean$Cluster <- factor(lkw_cluster$cluster)
print(gesamt_kfz_geo_clean)
## # A tibble: 6,402 × 28
## DATUM ZNR ZNAME.x STRTYP STRNR.x RINAME FZTYP DTVMS DTVMF DTVMO DTVDD
## <date> <int> <chr> <chr> <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2024-12-01 1075 Reichsb… B 8 Gesamt Kfz 31051 35485 32919 36601
## 2 2024-12-01 1078 Westbah… B 221 Gesamt Kfz 66551 68354 65484 68342
## 3 2024-12-01 1089 Florids… B 226 Gesamt Kfz 23962 26654 25003 27432
## 4 2024-12-01 1096 Brigitt… B 14 Gesamt Kfz 22181 25033 23812 25772
## 5 2024-12-01 1131 Karlspl… B 1 Gesamt Kfz 32842 36038 33930 36702
## 6 2024-12-01 1170 Donauka… B 227 Gesamt Kfz 59181 62840 60469 62766
## 7 2024-12-01 1177 Handels… B 14 Gesamt Kfz 21606 24222 23049 24871
## 8 2024-12-01 1179 Brunner… B 12 Gesamt Kfz 28079 32240 31695 32305
## 9 2024-12-01 1180 Hochstr… B 13 Gesamt Kfz 9884 11053 10524 11160
## 10 2024-12-01 1181 Breiten… B 13 Gesamt Kfz 11815 13057 12626 13158
## # ℹ 6,392 more rows
## # ℹ 17 more variables: DTVFR <dbl>, DTVSA <dbl>, DTVSF <dbl>, TVMAX <dbl>,
## # TVMAXT <date>, ISTCOVID19 <dbl>, ZNAME.y <chr>, STRNR.y <chr>,
## # RICHTUNG_1 <chr>, RICHTUNG_2 <chr>, LONGITUDE <dbl>, LATITUDE <dbl>,
## # BEZIRK_NAME <chr>, BEZIRK_PLZ <dbl>, BEZIRK_NR <dbl>, BEZIRK_CODE <dbl>,
## # Cluster <fct>
farben <- c("1" = "red", "2" = "green", "3" = "blue")
pal <- colorFactor(palette = farben, domain = gesamt_kfz_geo_clean$cluster)
## Warning: Unknown or uninitialised column: `cluster`.
leaflet(gesamt_kfz_geo_clean) %>%
addTiles() %>%
addCircleMarkers(
lng = ~LONGITUDE,
lat = ~LATITUDE,
color = ~pal(Cluster),
label = ~paste("Cluster:", Cluster),
radius = 3,
fillOpacity = 0.7
)
farben <- c("1" = "red", "2" = "green", "3" = "blue")
pal <- colorFactor(palette = farben, domain = gesamt_lkw_geo_clean$cluster)
## Warning: Unknown or uninitialised column: `cluster`.
leaflet(gesamt_lkw_geo_clean) %>%
addTiles() %>%
addCircleMarkers(
lng = ~LONGITUDE,
lat = ~LATITUDE,
color = ~pal(Cluster),
label = ~paste("Cluster:", Cluster),
radius = 3,
fillOpacity = 0.7
)
# Plot Cluster Kfz
ggplot(gesamt_kfz_geo_clean, aes(x = LONGITUDE, y = LATITUDE, color = Cluster)) +
geom_point(size = 2, alpha = 0.7) +
labs(title = "Clusteranalyse Kfz – Zählstellen Wien") +
theme_minimal()

# Plot Cluster Lkw
ggplot(gesamt_lkw_geo_clean, aes(x = LONGITUDE, y = LATITUDE, color = Cluster)) +
geom_point(size = 2, alpha = 0.7) +
labs(title = "Clusteranalyse LkwÄ – Zählstellen Wien") +
theme_minimal()

# Clusteranalyse (Features) der Tage Montag bis Freitag
cluster_features_mo_fr <- c("DTVMO", "DTVDD", "DTVFR")
# Daten bereinigen
kfz_data_mo_fr <- gesamt_kfz_geo %>%
select(all_of(cluster_features_mo_fr)) %>%
mutate_all(as.numeric) %>%
drop_na()
lkw_data_mo_fr <- gesamt_lkw_geo %>%
select(all_of(cluster_features_mo_fr)) %>%
mutate_all(as.numeric) %>%
drop_na()
kfz_scaled_mo_fr <- scale(kfz_data_mo_fr)
lkw_scaled_mo_fr <- scale(lkw_data_mo_fr)
# Elbow-Methode kfz
fviz_nbclust(kfz_scaled_mo_fr, kmeans, method = "wss") +
labs(title = "Elbow Method: Optimale Clusteranzahl (KMeans) Kfz", x = "Anzahl Cluster (k)", y = "WSS")

# Elbow-Methode lkw
fviz_nbclust(lkw_scaled_mo_fr, kmeans, method = "wss") +
labs(title = "Elbow Method: Optimale Clusteranzahl (KMeans) LkwÄ", x = "Anzahl Cluster (k)", y = "WSS")

# K-Means Cluster
kfz_cluster_mo_fr <- kmeans(kfz_scaled_mo_fr, centers = 3, nstart = 25)
lkw_cluster_mo_fr <- kmeans(lkw_scaled_mo_fr, centers = 4, nstart = 25)
# Cluster-Zuordnung hinzufügen
gesamt_kfz_geo_clean_mo_fr <- gesamt_kfz_geo[complete.cases(kfz_data_mo_fr), ]
gesamt_kfz_geo_clean_mo_fr$Cluster <- factor(kfz_cluster_mo_fr$cluster)
gesamt_lkw_geo_clean_mo_fr <- gesamt_lkw_geo[complete.cases(lkw_data_mo_fr), ]
gesamt_lkw_geo_clean_mo_fr$Cluster <- factor(lkw_cluster_mo_fr$cluster)
farben <- c("1" = "red", "2" = "green", "3" = "blue")
pal <- colorFactor(palette = farben, domain = gesamt_kfz_geo_clean_mo_fr$cluster)
## Warning: Unknown or uninitialised column: `cluster`.
leaflet(gesamt_kfz_geo_clean_mo_fr) %>%
addTiles() %>%
addCircleMarkers(
lng = ~LONGITUDE,
lat = ~LATITUDE,
color = ~pal(Cluster),
label = ~paste("Cluster:", Cluster),
radius = 3,
fillOpacity = 0.7
)
farben <- c("1" = "red", "2" = "green", "3" = "blue", "4" = "purple")
pal <- colorFactor(palette = farben, domain = gesamt_lkw_geo_clean_mo_fr$cluster)
## Warning: Unknown or uninitialised column: `cluster`.
leaflet(gesamt_lkw_geo_clean_mo_fr) %>%
addTiles() %>%
addCircleMarkers(
lng = ~LONGITUDE,
lat = ~LATITUDE,
color = ~pal(Cluster),
label = ~paste("Cluster:", Cluster),
radius = 3,
fillOpacity = 0.7
)
# Plot Cluster Kfz
ggplot(gesamt_kfz_geo_clean_mo_fr, aes(x = LONGITUDE, y = LATITUDE, color = Cluster)) +
geom_point(size = 2, alpha = 0.7) +
labs(title = "Clusteranalyse Kfz – Zählstellen Wien") +
theme_minimal()

# Plot Cluster Lkw
ggplot(gesamt_lkw_geo_clean_mo_fr, aes(x = LONGITUDE, y = LATITUDE, color = Cluster)) +
geom_point(size = 2, alpha = 0.7) +
labs(title = "Clusteranalyse LkwÄ – Zählstellen Wien") +
theme_minimal()

# Clusteranalyse (Features) nur Wochenende und Feiertage
cluster_features_we <- c("DTVSA", "DTVSF")
# Daten bereinigen
kfz_data_we <- gesamt_kfz_geo %>%
select(all_of(cluster_features_we)) %>%
mutate_all(as.numeric) %>%
drop_na()
lkw_data_we <- gesamt_lkw_geo %>%
select(all_of(cluster_features_we)) %>%
mutate_all(as.numeric) %>%
drop_na()
kfz_scaled_we <- scale(kfz_data_we)
lkw_scaled_we <- scale(lkw_data_we)
# Elbow-Methode kfz
fviz_nbclust(kfz_scaled_we, kmeans, method = "wss") +
labs(title = "Elbow Method: Optimale Clusteranzahl (KMeans) Kfz", x = "Anzahl Cluster (k)", y = "WSS")

# Elbow-Methode lkw
fviz_nbclust(lkw_scaled_we, kmeans, method = "wss") +
labs(title = "Elbow Method: Optimale Clusteranzahl (KMeans) LkwÄ", x = "Anzahl Cluster (k)", y = "WSS")

# K-Means Cluster
kfz_cluster_we <- kmeans(kfz_scaled_we, centers = 5, nstart = 25)
lkw_cluster_we <- kmeans(lkw_scaled_we, centers = 3, nstart = 25)
# Cluster-Zuordnung hinzufügen
gesamt_kfz_geo_clean_we <- gesamt_kfz_geo[complete.cases(kfz_data_we), ]
gesamt_kfz_geo_clean_we$Cluster <- factor(kfz_cluster_we$cluster)
gesamt_lkw_geo_clean_we <- gesamt_lkw_geo[complete.cases(lkw_data_we), ]
gesamt_lkw_geo_clean_we$Cluster <- factor(lkw_cluster_we$cluster)
farben <- c("1" = "red", "2" = "green", "3" = "turquoise","4" = "blue", "5" = "purple")
pal <- colorFactor(palette = farben, domain = gesamt_kfz_geo_clean_we$cluster)
## Warning: Unknown or uninitialised column: `cluster`.
leaflet(gesamt_kfz_geo_clean_we) %>%
addTiles() %>%
addCircleMarkers(
lng = ~LONGITUDE,
lat = ~LATITUDE,
color = ~pal(Cluster),
label = ~paste("Cluster:", Cluster),
radius = 3,
fillOpacity = 0.7
)
farben <- c("1" = "red", "2" = "green", "3" = "blue")
pal <- colorFactor(palette = farben, domain = gesamt_lkw_geo_clean_we$cluster)
## Warning: Unknown or uninitialised column: `cluster`.
leaflet(gesamt_lkw_geo_clean_we) %>%
addTiles() %>%
addCircleMarkers(
lng = ~LONGITUDE,
lat = ~LATITUDE,
color = ~pal(Cluster),
label = ~paste("Cluster:", Cluster),
radius = 3,
fillOpacity = 0.7
)
# Plot Cluster Kfz
ggplot(gesamt_kfz_geo_clean_we, aes(x = LONGITUDE, y = LATITUDE, color = Cluster)) +
geom_point(size = 2, alpha = 0.7) +
labs(title = "Clusteranalyse Kfz – Zählstellen Wien") +
theme_minimal()

# Plot Cluster Lkw
ggplot(gesamt_lkw_geo_clean_we, aes(x = LONGITUDE, y = LATITUDE, color = Cluster)) +
geom_point(size = 2, alpha = 0.7) +
labs(title = "Clusteranalyse LkwÄ – Zählstellen Wien") +
theme_minimal()
